Generic Mapping types


In [5]:
from collections import abc
my_dict = {}

In [6]:
isinstance(my_dict, abc.Mapping)


Out[6]:
True

In [7]:
tt = (1, 2, (30, 40))
hash(tt)


Out[7]:
8027212646858338501

In [8]:
tl = (1, 2, [30, 40])
hash(tl)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-d2eaad7bd112> in <module>()
      1 tl = (1, 2, [30, 40])
----> 2 hash(tl)

TypeError: unhashable type: 'list'

In [9]:
tf = (1, 2, frozenset([30, 40]))
hash(tf)


Out[9]:
-4118419923444501110

In [12]:
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'],[1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})

In [13]:
a == b == c == d == e


Out[13]:
True

dict Comprehensions


In [22]:
DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'), 
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan'),
]

In [23]:
country_code = {country: code for code, country in DIAL_CODES}

In [24]:
country_code


Out[24]:
{'Bangladesh': 880,
 'Brazil': 55,
 'China': 86,
 'India': 91,
 'Indonesia': 62,
 'Japan': 81,
 'Nigeria': 234,
 'Pakistan': 92,
 'Russia': 7,
 'United States': 1}

In [26]:
{code: country.upper() for country, code in country_code.items() if code < 66}


Out[26]:
{1: 'UNITED STATES', 7: 'RUSSIA', 55: 'BRAZIL', 62: 'INDONESIA'}

In [34]:
a.values()


Out[34]:
dict_values([3, 2, 1])

Handling Missing Keys with setdefault


In [35]:
import sys
import re

In [36]:
WORD_RE = re.compile(r'\w+')

In [40]:
index = {}
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)
#             occurences = index.get(word, [])
#             occurences.append(location)
#             index[word] = occurences

In [47]:
# for word in sorted(index, key=str.upper):
#     print(word, index[word])

Mappings with Flexible Key Lookup

defaultdict: Another Take on Missing Keys


In [51]:
import sys
import re
import collections

In [52]:
WORD_RE = re.compile(r'\w+')

In [53]:
index = collections.defaultdict(list)
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            index[word].append(location)

In [55]:
# for word in sorted(index, key=str.upper):
#     print(word, index[word])

The missing Method


In [70]:
class StrKeyDict0(dict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
        
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [72]:
# Tests for item retrieval using `d[key]` notation
d = StrKeyDict0([('2', 'two'), ('4', 'four')])

In [62]:
d['2']


Out[62]:
'two'

In [63]:
d[4]


Out[63]:
'four'

In [64]:
d[1]


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-64-6ee4d8336f71> in <module>()
----> 1 d[1]

<ipython-input-59-69d5e491f534> in __missing__(self, key)
      4         if isinstance(key, str):
      5             raise KeyError(key)
----> 6         return self[str(key)]
      7 
      8     def get(self, key, default=None):

<ipython-input-59-69d5e491f534> in __missing__(self, key)
      3     def __missing__(self, key):
      4         if isinstance(key, str):
----> 5             raise KeyError(key)
      6         return self[str(key)]
      7 

KeyError: '1'

In [65]:
d.get('2')


Out[65]:
'two'

In [66]:
d.get(4)


Out[66]:
'four'

In [67]:
d.get(1, 'N/A')


Out[67]:
'N/A'

In [74]:
2 in d


Out[74]:
True

In [75]:
1 in d


Out[75]:
False

Variations of dict


In [7]:
import builtins
from collections import ChainMap
import collections

In [5]:
pylookup = ChainMap(locals(), globals(), vars(builtins))

In [8]:
ct = collections.Counter('abracadabra')
ct


Out[8]:
Counter({'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r': 2})

In [9]:
ct.update('aaaaazzz')

In [11]:
ct


Out[11]:
Counter({'a': 10, 'b': 2, 'c': 1, 'd': 1, 'r': 2, 'z': 3})

In [12]:
ct.most_common(2)


Out[12]:
[('a', 10), ('z', 3)]

Subclassing UserDict


In [13]:
import collections

class StrKeyDict(collections.UserDict):
    
    def __missing(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

Immutable Mappings


In [14]:
from types import MappingProxyType

In [15]:
d = {1: 'A'}
d_proxy = MappingProxyType(d)
d_proxy


Out[15]:
mappingproxy({1: 'A'})

In [16]:
d_proxy[1]


Out[16]:
'A'

In [17]:
d_proxy[2] = 'x'


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-17-2515ac52334f> in <module>()
----> 1 d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [18]:
d[2] = 'B'

In [19]:
d_proxy


Out[19]:
mappingproxy({1: 'A', 2: 'B'})

In [20]:
d_proxy[2]


Out[20]:
'B'

Set Theory


In [21]:
l = ['spam', 'spam', 'eggs', 'spam']
set(l)


Out[21]:
{'eggs', 'spam'}

In [22]:
list(set(l))


Out[22]:
['spam', 'eggs']

In [24]:
haystack = set([1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 26, 29, 30])
needles = set([0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31])

In [27]:
found = len(needles & haystack)
found


Out[27]:
6

In [29]:
found = 0
for n in needles:
    if n in haystack:
        found +=1
found


Out[29]:
6

In [30]:
found = len(needles.intersection(haystack))
found


Out[30]:
6

set Literals


In [35]:
s = {1}
type(s)


Out[35]:
set

In [36]:
s


Out[36]:
{1}

In [37]:
s.pop()


Out[37]:
1

In [38]:
s


Out[38]:
set()

In [39]:
from dis import dis
dis('{1}')


  1           0 LOAD_CONST               0 (1)
              3 BUILD_SET                1
              6 RETURN_VALUE

In [40]:
dis('set([1])')


  1           0 LOAD_NAME                0 (set)
              3 LOAD_CONST               0 (1)
              6 BUILD_LIST               1
              9 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
             12 RETURN_VALUE

In [41]:
frozenset(range(10))


Out[41]:
frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

Set Comprehensions


In [57]:
from unicodedata import name
{chr(i) for i in range(32,256) if 'SIGN' in name(chr(i),'')}


Out[57]:
{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

In [55]:
name(chr(172),'')


Out[55]:
'NOT SIGN'

dict and set Under the Hood

Practical Consequences of How dict Works

Key ordering depends on insertion order


In [58]:
DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'), 
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan'),
]

In [60]:
d1 = dict(DIAL_CODES)
print('d1:', d1.keys())


d1: dict_keys([880, 1, 86, 55, 7, 234, 91, 92, 62, 81])

In [61]:
d2 = dict(sorted(DIAL_CODES))
print('d2:', d2.keys())


d2: dict_keys([880, 1, 91, 86, 81, 55, 234, 7, 92, 62])

In [62]:
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1]))
print('d3:', d3.keys())


d3: dict_keys([880, 81, 1, 86, 55, 7, 234, 91, 92, 62])

In [63]:
assert d1 == d2 and d2 == d3

In [ ]:


In [ ]: